airpol <-
read.csv("~/GitHub/STA135/Homework/HW1/Air-Pollution Data G.C.Tao.csv",
header = TRUE)
library(ggplot2)
library(ggExtra)
## Warning: package 'ggExtra' was built under R version 4.1.3
library(GGally)
## Warning: package 'GGally' was built under R version 4.1.3
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
#Pairwise plots
ggpairs(airpol)
#Marginal plots
colnames <- names(airpol)
for (i in colnames[-1]){
print(ggMarginal(ggplot(airpol, aes_string(x = "Wind..x1.", y = i)) + geom_point(color = 'firebrick'), type = 'histogram', fill = 'dodgerblue'))
}
### b.)
#xbar, mean vector
colMeans(airpol[sapply(airpol, is.numeric)])
## Wind..x1. Solar.Radiation..x2. CO.x3.
## 7.500000 73.857143 4.547619
## NO.x4. N02.x5. O3.x6.
## 2.190476 10.047619 9.404762
## HC.x7.
## 3.095238
#Sn, the COV/VAR marix
n = nrow(airpol)
cov(airpol) * (n-1)/n
## Wind..x1. Solar.Radiation..x2. CO.x3. NO.x4.
## Wind..x1. 2.4404762 -2.7142857 -0.3690476 -0.4523810
## Solar.Radiation..x2. -2.7142857 293.3605442 3.8163265 -1.3537415
## CO.x3. -0.3690476 3.8163265 1.4858277 0.6575964
## NO.x4. -0.4523810 -1.3537415 0.6575964 1.1541950
## N02.x5. -0.5714286 6.6020408 2.2596372 1.0623583
## O3.x6. -2.1785714 30.0578231 2.7545351 -0.7913832
## HC.x7. 0.1666667 0.6088435 0.1383220 0.1723356
## N02.x5. O3.x6. HC.x7.
## Wind..x1. -0.5714286 -2.1785714 0.1666667
## Solar.Radiation..x2. 6.6020408 30.0578231 0.6088435
## CO.x3. 2.2596372 2.7545351 0.1383220
## NO.x4. 1.0623583 -0.7913832 0.1723356
## N02.x5. 11.0929705 3.0521542 1.0192744
## O3.x6. 3.0521542 30.2409297 0.5804989
## HC.x7. 1.0192744 0.5804989 0.4671202
# R the correlation matrix
round(cor(airpol),2)
## Wind..x1. Solar.Radiation..x2. CO.x3. NO.x4. N02.x5.
## Wind..x1. 1.00 -0.10 -0.19 -0.27 -0.11
## Solar.Radiation..x2. -0.10 1.00 0.18 -0.07 0.12
## CO.x3. -0.19 0.18 1.00 0.50 0.56
## NO.x4. -0.27 -0.07 0.50 1.00 0.30
## N02.x5. -0.11 0.12 0.56 0.30 1.00
## O3.x6. -0.25 0.32 0.41 -0.13 0.17
## HC.x7. 0.16 0.05 0.17 0.23 0.45
## O3.x6. HC.x7.
## Wind..x1. -0.25 0.16
## Solar.Radiation..x2. 0.32 0.05
## CO.x3. 0.41 0.17
## NO.x4. -0.13 0.23
## N02.x5. 0.17 0.45
## O3.x6. 1.00 0.15
## HC.x7. 0.15 1.00
We can see that none of the variables have a very high correlation. We can see the highest correlation in our data is between Carbon Monoxide (CO) and Nitrogen Dioxide (NO2) of 0.557. We also can see that Wind is negatively correlated with all pollutants.
#a.)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
x1 <- c(-6, -3, -2, 1, 2, 5, 6, 8)
x2 <- c(-2, -3, 1, -1, 2, 1, 5, 3)
mydata <- data.frame(x1, x2)
#ggplot(aes(x = x1, y = x2)) + geom_point()
fig <- plot_ly(mydata, x = ~x1, y = ~x2)
fig
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
n = nrow(mydata)
myvarmat = cov(mydata) * (n-1)/n
myvarmat
## x1 x2
## x1 20.48438 9.09375
## x2 9.09375 6.18750
s11 = myvarmat[1]
s12 = myvarmat[2]
s22 = myvarmat[4]
#b.)
#Formula pdf page 56
# x1~ = x1 costheta + x2 sintheta
x1tilde = x1 * 0.899 + x2 * 0.438
# x2~ = -x1 sintheta + x2 costheta
x2tilde = -x1 * 0.438 + x2 * 0.899
#c
datatilde = data.frame(x1tilde, x2tilde)
n = nrow(datatilde)
diag(var(datatilde)) #* (n-1)/n)
## x1tilde x2tilde
## 28.461798 2.021716
#d
newx1 = 4 * 0.899 + -2 * 0.438
newx2 = -4 * 0.438 + -2 * 0.899
dOP = sqrt((newx1^2/28.462) + (newx2^2/2.022))
dOP
## [1] 2.548064
#e
#From footnote
a11 = (0.899^2/(0.899^2 * s11 + 2 * 0.438 * 0.899 * s12 + 0.438^2 * s22)) + (0.438^2/ (0.899^2 * s22 - 2 * 0.438 * 0.899 * s12 + 0.438^2 * s11))
a22 = (0.438^2 / (0.899^2 * s11 + 2 * 0.438 * 0.899 *s12 + 0.438^2 * s22)) + (0.899^2 / (0.899^2 * s22 - 2 * 0.438 * 0.899*s12 + 0.438^2 * s11))
a12 = ((0.438 * 0.899) / (0.899^2 * s11 + 2 * 0.438 * 0.899 * s12 + 0.438^2 * s22)) - ((0.438 * 0.899)/ (0.899^2 * s22 - 2 * 0.438 * 0.899 * s12 + 0.438^2 * s11))
dOP2 = sqrt((a11 * 16) + (2*a12 * 4 * -2) + (a22 * 4))
dOP2
## [1] 2.724179
We can see that within rounding error, these two distances are about the same.
WomenTrack <- read.csv("~/Github/STA135/Homework/HW1/National Track Records for Women.csv", header = TRUE)
WomenTrack
## Country X100m.s. X200m.s. X400m.s. X800m.min. X1500m.min.
## 1 Argentina 11.57 22.94 52.50 2.05 4.25
## 2 Australia 11.12 22.23 48.63 1.98 4.02
## 3 Austria 11.15 22.70 50.62 1.94 4.05
## 4 Belgium 11.14 22.48 51.45 1.97 4.08
## 5 Bermuda 11.46 23.05 53.30 2.07 4.29
## 6 Brazil 11.17 22.60 50.62 1.97 4.17
## 7 Canada 10.98 22.62 49.91 1.97 4.00
## 8 Chile 11.65 23.84 53.68 2.00 4.22
## 9 China 10.79 22.01 49.81 1.93 3.84
## 10 Columbia 11.31 22.92 49.64 2.04 4.34
## 11 Cook Islands 12.52 25.91 61.65 2.28 4.82
## 12 Costa Rica 11.72 23.92 52.57 2.10 4.52
## 13 Czech Republic 11.09 21.97 47.99 1.89 4.03
## 14 Denmark 11.42 23.36 52.92 2.02 4.12
## 15 Domincan Republic 11.63 23.91 53.02 2.09 4.54
## 16 Finland 11.13 22.39 50.14 2.01 4.10
## 17 France 10.73 21.99 48.25 1.94 4.03
## 18 Germany 10.81 21.71 47.60 1.92 3.96
## 19 Great Britian 11.10 22.10 49.43 1.94 3.97
## 20 Greece 10.83 22.67 50.56 2.00 4.09
## 21 Guatemala 11.92 24.50 55.64 2.15 4.48
## 22 Hungary 11.41 23.06 51.50 1.99 4.02
## 23 India 11.56 23.86 55.08 2.10 4.36
## 24 Indonesia 11.38 22.82 51.05 2.00 4.10
## 25 Ireland 11.43 23.02 51.07 2.01 3.98
## 26 Israel 11.45 23.15 52.06 2.07 4.24
## 27 Italy 11.14 22.60 51.31 1.96 3.98
## 28 Japan 11.36 23.33 51.93 2.01 4.16
## 29 Kenya 11.62 23.37 51.56 1.97 3.96
## 30 Korea, South 11.49 23.80 53.67 2.09 4.24
## 31 Korea, North 11.80 25.10 56.23 1.97 4.25
## 32 Luxembourg 11.76 23.96 56.07 2.07 4.35
## 33 Malaysia 11.50 23.37 52.56 2.12 4.39
## 34 Mauritius 11.72 23.83 54.62 2.06 4.33
## 35 Mexico 11.09 23.13 48.89 2.02 4.19
## 36 Myanmar(Burma) 11.66 23.69 52.96 2.03 4.20
## 37 Netherlands 11.08 22.81 51.35 1.93 4.06
## 38 New Zealand 11.32 23.13 51.60 1.97 4.10
## 39 Norway 11.41 23.31 52.45 2.03 4.01
## 40 Papua New Guinea 11.96 24.68 55.18 2.24 4.62
## 41 Philippines 11.28 23.35 54.75 2.12 4.41
## 42 Poland 10.93 22.13 49.28 1.95 3.99
## 43 Portugal 11.30 22.88 51.92 1.98 3.96
## 44 Romania 11.30 22.35 49.88 1.92 3.90
## 45 Russia 10.77 21.87 49.11 1.91 3.87
## 46 Samoa 12.38 25.45 56.32 2.29 5.42
## 47 Singapore 12.13 24.54 55.08 2.12 4.52
## 48 Spain 11.06 22.38 49.67 1.96 4.01
## 49 Sweden 11.16 22.82 51.69 1.99 4.09
## 50 Switzerland 11.34 22.88 51.32 1.98 3.97
## 51 Taiwan 11.22 22.56 52.74 2.08 4.38
## 52 Thailand 11.33 23.30 52.60 2.06 4.38
## 53 Turkey 11.25 22.71 53.15 2.01 3.92
## 54 U.S.A. 10.49 21.34 48.83 1.94 3.95
## X3000m.min. Marathon
## 1 9.19 150.32
## 2 8.63 143.51
## 3 8.78 154.35
## 4 8.82 143.05
## 5 9.81 174.18
## 6 9.04 147.41
## 7 8.54 148.36
## 8 9.26 152.23
## 9 8.10 139.39
## 10 9.37 155.19
## 11 11.10 212.33
## 12 9.84 164.33
## 13 8.87 145.19
## 14 8.71 149.34
## 15 9.89 166.46
## 16 8.69 148.00
## 17 8.64 148.27
## 18 8.51 141.45
## 19 8.37 135.25
## 20 8.96 153.40
## 21 9.71 171.33
## 22 8.55 148.50
## 23 9.50 154.29
## 24 9.11 158.10
## 25 8.36 142.23
## 26 9.33 156.36
## 27 8.59 143.47
## 28 8.74 139.41
## 29 8.39 138.47
## 30 9.01 146.12
## 31 8.96 145.31
## 32 9.21 149.23
## 33 9.31 169.28
## 34 9.24 167.09
## 35 8.89 144.06
## 36 9.08 158.42
## 37 8.57 143.43
## 38 8.76 146.46
## 39 8.53 141.06
## 40 10.21 221.14
## 41 9.81 165.48
## 42 8.53 144.18
## 43 8.50 143.29
## 44 8.36 142.50
## 45 8.38 141.31
## 46 13.12 191.58
## 47 9.94 154.41
## 48 8.48 146.51
## 49 8.81 150.39
## 50 8.60 145.51
## 51 9.63 159.53
## 52 10.07 162.39
## 53 8.53 151.43
## 54 8.43 141.16
#First, convert last 4 columns into seconds
WomenTrack[,5:8] = WomenTrack[, 5:8] * 60
WomenTrack
## Country X100m.s. X200m.s. X400m.s. X800m.min. X1500m.min.
## 1 Argentina 11.57 22.94 52.50 123.0 255.0
## 2 Australia 11.12 22.23 48.63 118.8 241.2
## 3 Austria 11.15 22.70 50.62 116.4 243.0
## 4 Belgium 11.14 22.48 51.45 118.2 244.8
## 5 Bermuda 11.46 23.05 53.30 124.2 257.4
## 6 Brazil 11.17 22.60 50.62 118.2 250.2
## 7 Canada 10.98 22.62 49.91 118.2 240.0
## 8 Chile 11.65 23.84 53.68 120.0 253.2
## 9 China 10.79 22.01 49.81 115.8 230.4
## 10 Columbia 11.31 22.92 49.64 122.4 260.4
## 11 Cook Islands 12.52 25.91 61.65 136.8 289.2
## 12 Costa Rica 11.72 23.92 52.57 126.0 271.2
## 13 Czech Republic 11.09 21.97 47.99 113.4 241.8
## 14 Denmark 11.42 23.36 52.92 121.2 247.2
## 15 Domincan Republic 11.63 23.91 53.02 125.4 272.4
## 16 Finland 11.13 22.39 50.14 120.6 246.0
## 17 France 10.73 21.99 48.25 116.4 241.8
## 18 Germany 10.81 21.71 47.60 115.2 237.6
## 19 Great Britian 11.10 22.10 49.43 116.4 238.2
## 20 Greece 10.83 22.67 50.56 120.0 245.4
## 21 Guatemala 11.92 24.50 55.64 129.0 268.8
## 22 Hungary 11.41 23.06 51.50 119.4 241.2
## 23 India 11.56 23.86 55.08 126.0 261.6
## 24 Indonesia 11.38 22.82 51.05 120.0 246.0
## 25 Ireland 11.43 23.02 51.07 120.6 238.8
## 26 Israel 11.45 23.15 52.06 124.2 254.4
## 27 Italy 11.14 22.60 51.31 117.6 238.8
## 28 Japan 11.36 23.33 51.93 120.6 249.6
## 29 Kenya 11.62 23.37 51.56 118.2 237.6
## 30 Korea, South 11.49 23.80 53.67 125.4 254.4
## 31 Korea, North 11.80 25.10 56.23 118.2 255.0
## 32 Luxembourg 11.76 23.96 56.07 124.2 261.0
## 33 Malaysia 11.50 23.37 52.56 127.2 263.4
## 34 Mauritius 11.72 23.83 54.62 123.6 259.8
## 35 Mexico 11.09 23.13 48.89 121.2 251.4
## 36 Myanmar(Burma) 11.66 23.69 52.96 121.8 252.0
## 37 Netherlands 11.08 22.81 51.35 115.8 243.6
## 38 New Zealand 11.32 23.13 51.60 118.2 246.0
## 39 Norway 11.41 23.31 52.45 121.8 240.6
## 40 Papua New Guinea 11.96 24.68 55.18 134.4 277.2
## 41 Philippines 11.28 23.35 54.75 127.2 264.6
## 42 Poland 10.93 22.13 49.28 117.0 239.4
## 43 Portugal 11.30 22.88 51.92 118.8 237.6
## 44 Romania 11.30 22.35 49.88 115.2 234.0
## 45 Russia 10.77 21.87 49.11 114.6 232.2
## 46 Samoa 12.38 25.45 56.32 137.4 325.2
## 47 Singapore 12.13 24.54 55.08 127.2 271.2
## 48 Spain 11.06 22.38 49.67 117.6 240.6
## 49 Sweden 11.16 22.82 51.69 119.4 245.4
## 50 Switzerland 11.34 22.88 51.32 118.8 238.2
## 51 Taiwan 11.22 22.56 52.74 124.8 262.8
## 52 Thailand 11.33 23.30 52.60 123.6 262.8
## 53 Turkey 11.25 22.71 53.15 120.6 235.2
## 54 U.S.A. 10.49 21.34 48.83 116.4 237.0
## X3000m.min. Marathon
## 1 551.4 9019.2
## 2 517.8 8610.6
## 3 526.8 9261.0
## 4 529.2 8583.0
## 5 588.6 10450.8
## 6 542.4 8844.6
## 7 512.4 8901.6
## 8 555.6 9133.8
## 9 486.0 8363.4
## 10 562.2 9311.4
## 11 666.0 12739.8
## 12 590.4 9859.8
## 13 532.2 8711.4
## 14 522.6 8960.4
## 15 593.4 9987.6
## 16 521.4 8880.0
## 17 518.4 8896.2
## 18 510.6 8487.0
## 19 502.2 8115.0
## 20 537.6 9204.0
## 21 582.6 10279.8
## 22 513.0 8910.0
## 23 570.0 9257.4
## 24 546.6 9486.0
## 25 501.6 8533.8
## 26 559.8 9381.6
## 27 515.4 8608.2
## 28 524.4 8364.6
## 29 503.4 8308.2
## 30 540.6 8767.2
## 31 537.6 8718.6
## 32 552.6 8953.8
## 33 558.6 10156.8
## 34 554.4 10025.4
## 35 533.4 8643.6
## 36 544.8 9505.2
## 37 514.2 8605.8
## 38 525.6 8787.6
## 39 511.8 8463.6
## 40 612.6 13268.4
## 41 588.6 9928.8
## 42 511.8 8650.8
## 43 510.0 8597.4
## 44 501.6 8550.0
## 45 502.8 8478.6
## 46 787.2 11494.8
## 47 596.4 9264.6
## 48 508.8 8790.6
## 49 528.6 9023.4
## 50 516.0 8730.6
## 51 577.8 9571.8
## 52 604.2 9743.4
## 53 511.8 9085.8
## 54 505.8 8469.6
#Second, divide each column by respective meters to get meters per second